This vignette reproduces the analysis in Figure 4 of Schraivogel, Gschwind, et al. 

Data

As whole transcriptome reference data for bone marrow cell types, we use the data by Baccin et al., Nature Cell Biology in press.

mkdir ../data
wget https://static-content.springer.com/esm/art%3A10.1038%2Fs41556-019-0439-6/MediaObjects/41556_2019_439_MOESM4_ESM.zip #https://nicheview.shiny.embl.de/RNAMagnetDataBundle.zip
unzip -j -d ../data 41556_2019_439_MOESM4_ESM.zip

rm 41556_2019_439_MOESM4_ESM.zip

We also download the count matrices for the targeted seq expeirments of total and c-Kit+ bone marrow. Creation of count matrices from raw files provided on GEO can be reproduced by running the rule bone_marrow_cell_types of the pipeline provided at https://github.com/argschwind/tapseq_manuscript

mkdir -p ../data/TAPtotalBM/downsampled
mkdir -p ../data/TAPkitBM/downsampled
mkdir -p ../data/WholeKitBM/downsampled
mkdir -p ../data/WholeTotalBM/downsampled

wget -O ../data/TAPtotalBM/dge.txt.gz http://steinmetzlab.embl.de/TAPdata/dge_TAPtotalBM.txt.gz
gunzip ../data/TAPtotalBM/dge.txt.gz
wget -O ../data/TAPkitBM/dge.txt.gz http://steinmetzlab.embl.de/TAPdata/dge_TAPkitBM.txt.gz
gunzip ../data/TAPkitBM/dge.txt.gz

Data were downsampeld at the level of raw reads to realistically simulate a lower sequencing depth. The downsampling can be likewise be reproduced from the files provided on GEO using the rule bone_marrow_cell_types of the pipeline provided; alternatively we provide the results:

wget http://steinmetzlab.embl.de/TAPdata/TAPkitBM.zip
wget http://steinmetzlab.embl.de/TAPdata/TAPtotalBM.zip
wget http://steinmetzlab.embl.de/TAPdata/WholeKitBM.zip
wget http://steinmetzlab.embl.de/TAPdata/WholeTotalBM.zip

unzip -j -d ../data/TAPkitBM/downsampled TAPkitBM.zip
unzip -j -d ../data/TAPtotalBM/downsampled TAPtotalBM.zip
unzip -j -d ../data/WholeKitBM/downsampled WholeKitBM.zip
unzip -j -d ../data/WholeTotalBM/downsampled WholeTotalBM.zip

rm TAPkitBM.zip TAPtotalBM.zip WholeKitBM.zip WholeTotalBM.zip

Compute environment

The following packages are required:

.libPaths("/g/steinmetz/velten/Software/RLibs-seurat3/")
require(Seurat)
require(ggplot2)
require(parallel)
require(plyr)
require(irr)
require(pheatmap)
require(mclust)

Setup of basic Seurat objects

For TAP-seq, raw count matrixes are loaded into R and processed into a Seurat object using default settings.

dir <- "../data"
files <- c("TAPkitBM/dge.txt", "TAPtotalBM/dge.txt")

DGE <-
  lapply(file.path(dir, files),
    function(x) read.table(gzfile(x),header = T,row.names = 1)
  )
names(DGE) <- gsub("dge_(.+)\\.txt.gz","\\1",files)
DGE <-
  lapply(names(DGE), function(n) {
    x <- DGE[[n]]
    colnames(x) <- paste(gsub("/dge.txt","",n), colnames(x), sep = "-")
    x
  })

common.genes <- rownames(DGE[[1]])
if (length(DGE) > 1)
  for (i in 2:length(DGE))
    common.genes <- intersect(common.genes, rownames(DGE[[i]]))

DGE <- lapply(DGE, function(x)
  x[common.genes,])
DGE <- do.call(cbind, DGE)


#try simple Seurat workflow 
TAPseq <- CreateSeuratObject(DGE, min.features = 10)
TAPseq <- NormalizeData(object = TAPseq)
TAPseq <- FindVariableFeatures(object = TAPseq)
TAPseq <- ScaleData(object = TAPseq)
TAPseq <- RunPCA(object = TAPseq)
TAPseq <- RunTSNE(object = TAPseq)
TAPseq <- FindNeighbors(object = TAPseq)
TAPseq <- FindClusters(object = TAPseq)
TAPseq$cluster <- Idents(TAPseq)

Reference data is simply loaded into R as a Seurat object. Only cell types abundantly present in c-Kit+ and total bone marrow are used for the reference.

load("../data/NicheData10x.rda")

Unsupervised analysis of downsampled data

The following function reads in the downsampled data and performs Seurat clustering as for the non-downsampled data set. Rand indeces are then computed to compare the different cluster partitions. Downsampling is performed such that the average reads per cell is sampled to a given amount.

UseCells <- subset(NicheData10x,metadata....experiment.. %in% c("2018_2_HSPC","2017_9_totalBM") )
UseCells$predicted.id <- Idents(UseCells)
UseCells <- FindVariableFeatures(UseCells)
UseCells <- RunPCA(UseCells)
UseCells <- FindNeighbors(UseCells)
UseCells <- FindClusters(UseCells)
newnames <- gsub("2018_2_HSPC_","WholeKitBM-",colnames(UseCells))
newnames <- gsub("2017_9_totalBM_","WholeTotalBM-",newnames)
names(newnames) <- colnames(UseCells)
UseCells <- RenameCells(UseCells, new.names = newnames)

source("functions/function_cellTypeDownsampling.unsupervised.R")

tap.average <-   getDS.wrapper("TAP","perSample","average", reference = TAPseq)

whole.average <- getDS.wrapper("Whole","perSample","average")

average <- rbind(tap.average, whole.average) #, whole.target.average
#average <- subset(average, !(reads > 2000 & experiment =="TAP"))
qplot(x = reads, y= kappa, color = experiment, data=subset(average, panel =="perSample"), log="x") + xlab("Average reads per cell") + ylab("Adjusted Rand Index") + theme_bw() + theme(panel.grid = element_blank(),legend.position = c(0.79,0.3), legend.background = element_rect(fill = NA)) + scale_color_discrete(name = "Method",labels = c("TAP"="TAP-seq", "Whole" = "10x")) + geom_smooth(se=F, size =0.5,linetype =2, span=2)

fit.tap <- loess(kappa ~ log10(reads), data= subset(average, panel =="perSample" & experiment == "TAP"), span=2)
fit.whole <- loess(kappa ~ log10(reads), data= subset(average, panel =="perSample"  & experiment == "Whole"), span=2)
lookup <- data.frame(reads = 10^seq(2,5.5,by=0.001),
                     kappa.tap = predict(fit.tap, newdata = data.frame(reads = 10^seq(2,5.5,by=0.001))),
                     kappa.whole = predict(fit.whole, newdata = data.frame(reads = 10^seq(2,5.5,by=0.001))))
cost <-data.frame(kappa = seq(0.6,0.8,by =0.001),
                  reads.tap = sapply(seq(0.6,0.8,by =0.001), function(x) lookup$reads[which.min(abs(lookup$kappa.tap - x))]),
                  reads.whole = sapply(seq(0.6,0.8,by =0.001), function(x) lookup$reads[which.min(abs(lookup$kappa.whole - x))]))
cost$savings <- cost$reads.whole / cost$reads.tap

qplot(x = kappa, y= savings, data=cost, geom="line") + xlab("Adjusted Rand Index") + ylab("Fold cost reduction") + theme_bw() + theme(panel.grid = element_blank()) + scale_y_continuous(limits = c(0,15))

Supervised analysis of downsampled data

In ther context of supervised analyses, we first need a clean annotation reference, which we obtain by subsetting the Baccin et al dataset to only contain celltypes with abundant presence in total and c-Kit+ bone marrow:

UseCells <- subset(NicheData10x, idents = c("Ery/Mk prog.","Neutro prog.","Mono prog.","Gran/Mono prog.","LMPPs","large pre-B.","Mk prog.","Erythroblasts","Eo/Baso prog.","Monocytes","Ery prog.","pro-B","T cells","Neutrophils","small pre-B.","Dendritic cells","NK cells","B cell"))

anchors <- FindTransferAnchors(reference = UseCells, query = TAPseq, dims = 1:15)
predictions <- TransferData(anchorset = anchors, refdata = Idents(UseCells), dims = 1:15)
TAPseq <- AddMetaData(object = TAPseq, metadata = predictions)
Idents(TAPseq) <- TAPseq$cluster

# 
mean_by_cluster <- lapply(as.character(unique(Idents(TAPseq))), function(x) {
  out <- data.frame( ge = apply(GetAssayData(TAPseq, slot = "data")[,Idents(TAPseq) == x],1,mean))
  out
})
mean_by_cluster <- do.call(cbind, mean_by_cluster)
colnames(mean_by_cluster) <- as.character(unique(Idents(TAPseq)))
metaclustering <- hclust(as.dist(1-cor(mean_by_cluster))) #create a pretty heatmap... for the identification of metaclusters


coldata <- data.frame(row.names = colnames(TAPseq),
                      id = factor(Idents(TAPseq), levels = metaclustering$labels[metaclustering$order]),
                      projected = TAPseq$predicted.id,
                      experiment = gsub("-.+","",colnames(TAPseq)))
ann_colors = list(projected = NicheDataColors[unique(as.character(coldata$projected))])
coldata <- coldata[order(coldata$id),]
require(pheatmap)
toplot <- GetAssayData(TAPseq, slot = "scale.data")[,rownames(coldata)]
toplot[toplot > 6] <- 6

pheatmap(toplot, cluster_cols = F, annotation_col = coldata,show_colnames = F, show_rownames = F, annotation_colors = ann_colors )

We then rename the clusters identified through unsupervised analyses of the TAP-seq dataset to match the names of the whole transcriptome dataset. Since T and NK cells are insufficiently sanpled in both datasets, these two cell types are merged.

Finally, again construct Seurat objects from the downsampled datasets, but this time, project on the non-downsampled reference.

source("functions/function_cellTypeDownsampling.supervised.R")

#create an annotated version of the TAPseq dataset
TAPseq$predicted.id <- factor(TAPseq$predicted.id)
ident.table <- sapply(as.character(unique(Idents(TAPseq))), function(id) table(TAPseq$predicted.id[Idents(TAPseq)==id]))
renamer <- rownames(ident.table)[apply(ident.table,2,which.max)]
names(renamer) <- colnames(ident.table)
#renamer["11"] <- "large pre-B."
#renamer["15"] <- "Ery prog."
TAPseq <- RenameIdents(TAPseq, renamer)
TAPseq <- RenameIdents(TAPseq, "NK cells" = "T cells")

UseCells <- RenameIdents(object = UseCells, "NK cells" = "T cells")


mean_by_cluster <- lapply(as.character(unique(Idents(UseCells))), function(x) {
  out <- data.frame( ge = apply(GetAssayData(UseCells, slot = "data")[,Idents(UseCells) == x],1,mean))
  out
})
mean_by_cluster <- do.call(cbind, mean_by_cluster)
colnames(mean_by_cluster) <- as.character(unique(Idents(UseCells)))
metaclustering <- hclust(as.dist(1-cor(mean_by_cluster))) #create a pretty heatmap... for the identification of metaclusters


tap.average <-   getDS.wrapper("TAP","perSample","average", reference = TAPseq)

whole.average <- getDS.wrapper("Whole","perSample","average")

average <- rbind(tap.average, whole.average) #, whole.target.average

Also project deeply sequenced data on itsself. Find out how many reads each cell from the original datasets has.

#project UseCells on itsself
self.anchors <- FindTransferAnchors(reference = UseCells, query = UseCells, dims = 1:15)
self.predictions <- TransferData(anchorset = self.anchors, refdata = Idents(UseCells), dims = 1:15)


self.k <- kappa2(
  data.frame(
    self.predictions$predicted.id,
    Idents(UseCells)
  )
)$value

reads.whole <- read.table(url("http://steinmetzlab.embl.de/TAPdata/WholeTotalBM.reads_per_cell_barcode.txt"))
rownames(reads.whole) <- paste0("2017_9_totalBM_", reads.whole$V2)
reads.kit <- read.table(url("http://steinmetzlab.embl.de/TAPdata/WholeKitBM.reads_per_cell_barcode.txt"))

rownames(reads.kit) <- paste0("2018_2_HSPC_", reads.kit$V2)
reads.whole <- rbind(reads.whole,reads.kit)


average <- rbind(average,
                 data.frame(experiment = "Whole",panel = "perSample", sampling = "average", kappa = self.k, reads = mean(reads.whole$V1[rownames(reads.whole) %in% colnames(UseCells)], na.omit = T), ncells = ncol(UseCells)))


#project TAPseq on itsself
self.anchors <- FindTransferAnchors(reference = TAPseq, query = TAPseq, dims = 1:15)
self.predictions <- TransferData(anchorset = self.anchors, refdata = Idents(TAPseq), dims = 1:15)


self.k <- kappa2(
  data.frame(
    self.predictions$predicted.id,
    Idents(TAPseq)
  )
)$value

reads.whole <- read.table(url("http://steinmetzlab.embl.de/TAPdata/TAPtotalBM.reads_per_cell_barcode.txt"))
rownames(reads.whole) <- paste0("TAPtotalBM-", reads.whole$V2)
reads.kit <- read.table(url("http://steinmetzlab.embl.de/TAPdata/TAPkitBM.reads_per_cell_barcode.txt"))
rownames(reads.kit) <- paste0("TAPkitBM-", reads.kit$V2)
reads.whole <- rbind(reads.whole,reads.kit)


average <- rbind(average,
                 data.frame(experiment = "TAP",panel = "perSample", sampling = "average", kappa = self.k, reads = mean(reads.whole$V1[rownames(reads.whole) %in% colnames(TAPseq)], na.omit = T), ncells = ncol(UseCells)))

Plot figure 4g

qplot(x = reads, y=100* kappa, color = experiment, data=subset(average, panel =="perSample" & reads != 5500), log="x") + xlab("Average reads per cell") + ylab("% correctly classifed cells") + theme_bw() + theme(panel.grid = element_blank(),legend.position = c(0.7,0.3),axis.title.y = element_text(size=10)) + scale_color_discrete(name = "Method",labels = c("TAP"="TAP-seq", "Whole" = "10x")) + geom_smooth(se=F, size =0.5,linetype =2)

Plot figure 4h

fit.tap <- loess(kappa ~ log10(reads), data= subset(average, panel =="perSample" & reads != 5500 & experiment == "TAP"))
fit.whole <- loess(kappa ~ log10(reads), data= subset(average, panel =="perSample" & reads != 5500 & experiment == "Whole"))
lookup <- data.frame(reads = 10^seq(2,5.5,by=0.001),
                     kappa.tap = predict(fit.tap, newdata = data.frame(reads = 10^seq(2,5.5,by=0.001))),
                     kappa.whole = predict(fit.whole, newdata = data.frame(reads = 10^seq(2,5.5,by=0.001))))
cost <-data.frame(kappa = seq(0.75,0.93,by =0.001),
                  reads.tap = sapply(seq(0.75,0.93,by =0.001), function(x) lookup$reads[which.min(abs(lookup$kappa.tap - x))]),
                  reads.whole = sapply(seq(0.75,0.93,by =0.001), function(x) lookup$reads[which.min(abs(lookup$kappa.whole - x))]))
cost$savings <- cost$reads.whole / cost$reads.tap

qplot(x = 100*kappa, y= savings, data=cost, geom="line") + xlab("% correctly classified cells") + ylab("Fold cost reduction") + theme_bw() + theme(panel.grid = element_blank()) + scale_y_continuous(limits = c(0,15)) + scale_x_continuous(limits = c(75,90))

Downsampled t-SNE

folders <- c("WholeKitBM/downsampled/","WholeTotalBM/downsampled/")
folders <- file.path(dir, folders)
Whole100 <- getDS(folders, "perSample","average",100, output = "seurat",minFeature = 10)
## perSample average 100 
## PC_ 1 
## Positive:  Car2, Rhd, Car1, C1qtnf12, Tubb5, Blvrb, Alad, Prdx2, Dut, Rrm2 
##     Tuba1b, Ctse, Hebp1, Tmem14c, Elane, Mpo, Hdgf, C1qbp, Cenpa, Anp32b 
##     Cpox, Glrx5, Pclaf, Gmnn, Ddx39, Eef1g, Gclm, Birc5, Tuba4a, Prtn3 
## Negative:  S100a9, S100a8, Ngp, Camp, Lcn2, Ltf, Ifitm6, Lyz2, Cd74, Pglyrp1 
##     Mmp8, Wfdc21, Anxa1, Chil3, Klf2, H2-Aa, Ly6d, Mcemp1, Ccl6, Tyrobp 
##     H2-Ab1, S100a11, Ccl5, I830127L07Rik, Lrg1, Lgals3, Mgst1, Slpi, Cd9, Trbc2 
## PC_ 2 
## Positive:  Ly6d, Cd74, Ighm, Vpreb3, Ebf1, Dusp2, Cd69, H2-Aa, Dnajc7, H2-Ab1 
##     Pafah1b3, Chchd10, Ptprcap, Sox4, Trbc2, Ctla2a, Mzb1, Car2, Cnp, Ifitm1 
##     Nfkbia, Cd72, Tspan13, Gimap6, Blnk, Arl5c, Ccl5, Tnfrsf13c, Tifa, Tsc22d1 
## Negative:  Ngp, Lyz2, Camp, S100a8, S100a9, Chil3, Ltf, Lcn2, Anxa1, Ifitm6 
##     Pglyrp1, Prdx5, Hp, Lgals3, Wfdc21, Tyrobp, I830127L07Rik, Mgst1, Fcnb, Cebpe 
##     Mcemp1, Trem3, Cybb, Lrg1, Msrb1, S100a11, Slpi, Ltb4r1, Ifitm3, S100a6 
## PC_ 3 
## Positive:  Mpo, Elane, Ctsg, Prtn3, Ms4a3, Nkg7, Gstm1, Clec12a, Ly6c2, Serpinb1a 
##     Fcgr3, Prss57, Cst7, Cd63, Gsr, Manf, Rgcc, Ifngr1, Ms4a6c, Cst3 
##     Ccl9, Igfbp4, F13a1, Gng12, Pgd, F630028O10Rik, Cuedc2, Hdc, Nars, Aprt 
## Negative:  Car2, Rhd, Blvrb, Car1, C1qtnf12, Ctse, Vpreb3, Hba-a1, Hbb-bt, Hebp1 
##     Klf2, Cd24a, Ighm, Hbb-bs, S100a9, Ngp, Chchd10, Cpox, Camp, Cd74 
##     Gypa, Ly6d, Ltf, S100a8, Alad, Ifitm6, Tspan13, Smim1, Ebf1, Lcn2 
## PC_ 4 
## Positive:  Camp, Ltf, Ngp, Lcn2, S100a9, S100a8, Car2, Car1, C1qtnf12, Wfdc21 
##     Pglyrp1, Rhd, Anxa1, Cebpe, Cd63, Vamp5, Ctse, Mpo, Mt1, Trem3 
##     Gclm, Blvrb, Fcnb, Elane, Lrg1, Ms4a3, Hebp1, Ctsg, Gatm, Aqp1 
## Negative:  Fos, Ctss, Psap, Dusp1, Atf3, Lgals3, Ctsh, Klf2, Ms4a6c, Ifitm3 
##     Crip1, Pld4, Irf8, Cst3, Ighm, Ctsc, Tmsb10, Ms4a4c, Wfdc17, Ctsb 
##     Hspa1a, Ccl6, Tyrobp, Cd74, Cebpb, Ccl3, S100a10, Ccl9, F13a1, Mpeg1 
## PC_ 5 
## Positive:  Ifitm3, Ccl6, Car1, Car2, Rhd, Lgals3, Atf3, Cebpb, Blvrb, Apoe 
##     Lyz2, C1qtnf12, Ccl3, Wfdc17, Ctse, Ctss, Tyrobp, Dusp1, Hebp1, Hspa1a 
##     Ms4a6c, Fos, Psap, Ms4a4c, Hba-a1, Hbb-bs, S100a6, Ccr2, Ctsc, Ccl4 
## Negative:  Vpreb3, Ighm, Igll1, Chchd10, Vpreb1, Ebf1, Tifa, Pafah1b3, Myl4, Mzb1 
##     Cd24a, Dnajc7, Blnk, Ptprcap, Lrmp, Dusp2, Cd72, Cnp, Elof1, Ly6d 
##     H2afx, Fcnb, Pclaf, Arpc5l, Arl5c, Arl6ip1, Fam129c, Ube2c, Camp, Ccnb2
Whole100 <- RunTSNE(Whole100, dims = 1:15)
plf <- data.frame(row.names = colnames(Whole100),
                  x = Embeddings(Whole100,"tsne")[,1],
                  y = Embeddings(Whole100,"tsne")[,2])
include <- colnames(UseCells)
include <- gsub("2018_2_HSPC_","WholeKitBM-",include)
include <- gsub("2017_9_totalBM_","WholeTotalBM-",include)
id <- Idents(UseCells); names(id) <- include
plf <- plf[intersect(include,rownames(plf)),]
plf$id <- id[rownames(plf)]
qplot(x=x,y=y,color=id,data=plf, size=I(0.5)) + scale_color_manual(values=NicheDataColors, guide=F) + theme_bw() + theme(panel.grid = element_blank(), axis.title = element_blank(),axis.text = element_blank(),axis.ticks = element_blank())

folders <- c("TAPkitBM/downsampled/", "TAPtotalBM/downsampled/")
folders <- file.path(dir, folders)
TAP100 <- getDS(folders, "perSample","average",100, output = "seurat", ncells = 4397)
## perSample average 100 
## PC_ 1 
## Positive:  Elane, Prtn3, Mpo, Ctsg, Ms4a3, Slpi, Gstm1, Ly6c2, Cst7, Lgals1 
##     Emb, BC035044, Alas1, Sdf2l1, Prss57, Calr, F13a1, Cd34, Glipr1, Pdia6 
##     Mpeg1, Csf1r, Nkg7, Irf8, Csf3r, Mgl2, Ly86, Cebpa, Ifitm2, Tmem176b 
## Negative:  Car1, Ctse, Cldn13, Hba-a1, Rhd, Klf1, Hba-a2, Hebp1, Egr1, Phlda1 
##     Epdr1, Nfkbia, Gata1, Prss50, Gypa, Apoe, Cd79a, Btg2, Tal1, Cpox 
##     Irf4, Hmbs, Spib, Chchd10, Ctla2a, Cd19, Vpreb3, Gata2, Tsc22d1, Cnp 
## PC_ 2 
## Positive:  S100a6, Mpeg1, Lgals3, Arl5c, Spib, Cd79a, Pld4, Cd74, Irf4, F13a1 
##     Clec4a3, Ly86, Csf1r, Vpreb3, Cd19, Cd79b, Ifitm6, Irf8, Ly6c2, H2-Eb1 
##     Siglecg, Myl4, Atf3, Ctss, Ccr2, Ms4a1, Igll1, Vpreb1, Lcn2, Tifa 
## Negative:  Srm, Hspd1, Lmo2, Nkg7, Calr, Mpo, Egr1, Ctla2a, Cd34, Kit 
##     Prtn3, Car1, Mt1, Ifitm2, Ifitm1, Ctsg, Glrx5, Hmgb1, Tmem176b, Hebp1 
##     Ctse, Ms4a3, Hmbs, Tal1, Cst7, Gata2, Tmem14c, Meis1, Klf1, Gata1 
## PC_ 3 
## Positive:  Ctla2a, Nr4a1, Ifitm1, Tmem176b, Meis1, Egr1, Myct1, Cd34, Gpr171, Gata2 
##     Itga2b, Adgrl4, Pbx1, Pf4, Apoe, Ccl4, Tsc22d1, Btg2, Mpl, Nkg7 
##     Flt3, Ifitm2, Gp1bb, Cpa3, Nrgn, Lmo4, Ms4a2, Zc3h12a, Vwf, Kit 
## Negative:  Cldn13, Ctse, Hba-a1, Rhd, Hebp1, Hba-a2, Car1, Mt1, Klf1, Hmbs 
##     Gypa, Cpox, Tfrc, Ly6c2, Elane, Tmem14c, Slpi, Prss50, Epdr1, Glrx5 
##     Ms4a3, F13a1, Gata1, Alas1, Mpeg1, Chchd10, Gstm1, Ctsg, Csf1r, Mgl2 
## PC_ 4 
## Positive:  S100a6, Lgals3, Ifitm3, Mpeg1, Ifitm2, Clec4a3, F13a1, Csf1r, Pld4, Nr4a1 
##     Atf3, Ifitm6, Ccr2, Plaur, Apoe, Ly86, Ctss, Ly6c2, Ms4a6c, Btg2 
##     Ctla2a, Egr1, Slc11a1, Pf4, Retnlg, Ccl4, Pbx1, Slpi, Car1, Lcn2 
## Negative:  Cd79a, Irf4, Vpreb3, Spib, Cd79b, Cd19, Myl4, Vpreb1, Igll1, Chchd10 
##     Siglecg, Cnp, Ms4a3, Elane, Ctsg, Mpo, Vpreb2, Ms4a1, Bfsp2, Prss57 
##     Pafah1b3, Prtn3, Arl5c, BC035044, Hmgb1, Cst7, Fcrla, Calr, Nkg7, Alas1 
## PC_ 5 
## Positive:  Irf8, BC035044, Gpr171, Cd34, Emb, Hspd1, Lgals1, Csf1r, Ly86, Tsc22d1 
##     Mpeg1, F13a1, Flt3, Glipr1, Tmem14c, Lmo2, Tmem176b, Nr4a1, Pld4, Ccl4 
##     Hmgb1, Chchd10, Sdc1, Glrx5, Tifa, Cnp, Arl5c, Srm, Ms4a6c, Ifitm2 
## Negative:  Lcn2, Pglyrp1, Wfdc21, Cebpe, Fcnb, Retnlg, Vcam1, Ifitm6, Cd63, Ms4a3 
##     Nkg7, Prss57, Plscr1, Ms4a2, Cpa3, Alas1, Lmo4, Gata2, Apoe, Itga2b 
##     Mgl2, Pbx1, Phlda1, Pf4, Plaur, Fcer1a, Cst7, Rgcc, Gp1bb, Ap3s1
TAP100 <- RunTSNE(TAP100, dims = 1:15)
plf2 <- data.frame(x = Embeddings(TAP100,"tsne")[,1],
                   y = Embeddings(TAP100,"tsne")[,2],
                   id = TAP100$predicted.id)

qplot(x=x,y=y,color=id,data=plf2, size=I(0.5)) + scale_color_manual(values=NicheDataColors, guide=F) + theme_bw() + theme(panel.grid = element_blank(), axis.title = element_blank(),axis.text = element_blank(),axis.ticks = element_blank())